Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 2 - Regression/Multiple Linear Regression/[Python] Multiple Linear Regression.ipynb
1009 views
Kernel: Python 3

Multiple Linear Regression

from IPython.display import Image
Image('img/01.png')
Image in a Jupyter notebook
Image('img/02.png')
Image in a Jupyter notebook

Green = Dependent variable

Blue = Independent variable

Image('img/03.png')
Image in a Jupyter notebook

Data Preprocessing

# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.linear_model import LinearRegression import statsmodels.formula.api as sm %matplotlib inline # Importing the dataset dataset = pd.read_csv('50_Startups.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 4].values
X_train
array([[78389.47, 153773.43, 299737.29, 'New York'], [0.0, 135426.92, 0.0, 'California'], [100671.96, 91790.61, 249744.55, 'California'], [27892.92, 84710.77, 164470.71, 'Florida'], [131876.9, 99814.71, 362861.36, 'New York'], [162597.7, 151377.59, 443898.53, 'California'], [134615.46, 147198.87, 127716.82, 'California'], [72107.6, 127864.55, 353183.81, 'New York'], [0.0, 116983.8, 45173.06, 'California'], [77044.01, 99281.34, 140574.81, 'New York'], [61136.38, 152701.92, 88218.23, 'New York'], [114523.61, 122616.84, 261776.23, 'New York'], [46014.02, 85047.44, 205517.64, 'New York'], [75328.87, 144135.98, 134050.07, 'Florida'], [130298.13, 145530.06, 323876.68, 'Florida'], [76253.86, 113867.3, 298664.47, 'California'], [542.05, 51743.15, 0.0, 'New York'], [144372.41, 118671.85, 383199.62, 'New York'], [67532.53, 105751.03, 304768.73, 'Florida'], [22177.74, 154806.14, 28334.72, 'California'], [142107.34, 91391.77, 366168.42, 'Florida'], [78013.11, 121597.55, 264346.06, 'California'], [28663.76, 127056.21, 201126.82, 'Florida'], [119943.24, 156547.42, 256512.92, 'Florida'], [15505.73, 127382.3, 35534.17, 'New York'], [64664.71, 139553.16, 137962.62, 'California'], [44069.95, 51283.14, 197029.42, 'California'], [38558.51, 82982.09, 174999.3, 'California'], [123334.88, 108679.17, 304981.62, 'California'], [63408.86, 129219.61, 46085.25, 'California'], [55493.95, 103057.49, 214634.81, 'Florida'], [73994.56, 122782.75, 303319.26, 'Florida'], [23640.93, 96189.63, 148001.11, 'California'], [86419.7, 153514.11, 0.0, 'New York'], [94657.16, 145077.58, 282574.31, 'New York'], [20229.59, 65947.93, 185265.1, 'New York'], [46426.07, 157693.92, 210797.67, 'California'], [66051.52, 182645.56, 118148.2, 'Florida'], [153441.51, 101145.55, 407934.54, 'Florida'], [1000.23, 124153.04, 1903.93, 'New York']], dtype=object)
X_test
array([[101913.08, 110594.11, 229160.95, 'Florida'], [91992.39, 135495.07, 252664.93, 'California'], [61994.48, 115641.28, 91131.24, 'Florida'], [1315.46, 115816.21, 297114.46, 'Florida'], [91749.16, 114175.79, 294919.57, 'Florida'], [165349.2, 136897.8, 471784.1, 'New York'], [28754.33, 118546.05, 172795.67, 'California'], [93863.75, 127320.38, 249839.44, 'Florida'], [65605.48, 153032.06, 107138.38, 'New York'], [120542.52, 148718.95, 311613.29, 'New York']], dtype=object)
y_train
array([ 111313.02, 42559.73, 144259.4 , 77798.83, 156991.12, 191792.06, 156122.51, 105008.31, 14681.4 , 108552.04, 97483.56, 129917.04, 96479.51, 105733.54, 155752.6 , 118474.03, 35673.41, 182901.99, 108733.99, 65200.33, 166187.94, 126992.93, 90708.19, 132602.65, 69758.98, 107404.34, 89949.14, 81005.76, 149759.96, 97427.84, 96778.92, 110352.25, 71498.49, 122776.86, 125370.37, 81229.06, 96712.8 , 103282.38, 191050.39, 64926.08])
y_test
array([ 146121.95, 134307.35, 99937.59, 49490.75, 124266.9 , 192261.83, 78239.91, 141585.52, 101004.64, 152211.77])

Encoding categorical data

labelencoder = LabelEncoder() X[:, 3] = labelencoder.fit_transform(X[:, 3]) onehotencoder = OneHotEncoder(categorical_features = [3]) X = onehotencoder.fit_transform(X).toarray()
X
array([[ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.65349200e+05, 1.36897800e+05, 4.71784100e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.62597700e+05, 1.51377590e+05, 4.43898530e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.53441510e+05, 1.01145550e+05, 4.07934540e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.44372410e+05, 1.18671850e+05, 3.83199620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.42107340e+05, 9.13917700e+04, 3.66168420e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.31876900e+05, 9.98147100e+04, 3.62861360e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.34615460e+05, 1.47198870e+05, 1.27716820e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.30298130e+05, 1.45530060e+05, 3.23876680e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.20542520e+05, 1.48718950e+05, 3.11613290e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.23334880e+05, 1.08679170e+05, 3.04981620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.01913080e+05, 1.10594110e+05, 2.29160950e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00671960e+05, 9.17906100e+04, 2.49744550e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.38637500e+04, 1.27320380e+05, 2.49839440e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 9.19923900e+04, 1.35495070e+05, 2.52664930e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.19943240e+05, 1.56547420e+05, 2.56512920e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.14523610e+05, 1.22616840e+05, 2.61776230e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.80131100e+04, 1.21597550e+05, 2.64346060e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 9.46571600e+04, 1.45077580e+05, 2.82574310e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.17491600e+04, 1.14175790e+05, 2.94919570e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 8.64197000e+04, 1.53514110e+05, 0.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.62538600e+04, 1.13867300e+05, 2.98664470e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.83894700e+04, 1.53773430e+05, 2.99737290e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.39945600e+04, 1.22782750e+05, 3.03319260e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.75325300e+04, 1.05751030e+05, 3.04768730e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.70440100e+04, 9.92813400e+04, 1.40574810e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.46647100e+04, 1.39553160e+05, 1.37962620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.53288700e+04, 1.44135980e+05, 1.34050070e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.21076000e+04, 1.27864550e+05, 3.53183810e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.60515200e+04, 1.82645560e+05, 1.18148200e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.56054800e+04, 1.53032060e+05, 1.07138380e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.19944800e+04, 1.15641280e+05, 9.11312400e+04], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.11363800e+04, 1.52701920e+05, 8.82182300e+04], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.34088600e+04, 1.29219610e+05, 4.60852500e+04], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 5.54939500e+04, 1.03057490e+05, 2.14634810e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.64260700e+04, 1.57693920e+05, 2.10797670e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 4.60140200e+04, 8.50474400e+04, 2.05517640e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.86637600e+04, 1.27056210e+05, 2.01126820e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.40699500e+04, 5.12831400e+04, 1.97029420e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 2.02295900e+04, 6.59479300e+04, 1.85265100e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.85585100e+04, 8.29820900e+04, 1.74999300e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.87543300e+04, 1.18546050e+05, 1.72795670e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.78929200e+04, 8.47107700e+04, 1.64470710e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.36409300e+04, 9.61896300e+04, 1.48001110e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.55057300e+04, 1.27382300e+05, 3.55341700e+04], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.21777400e+04, 1.54806140e+05, 2.83347200e+04], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00023000e+03, 1.24153040e+05, 1.90393000e+03], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.31546000e+03, 1.15816210e+05, 2.97114460e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.35426920e+05, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 5.42050000e+02, 5.17431500e+04, 0.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.16983800e+05, 4.51730600e+04]])

Avoid dummy variable trap

X = X[:, 1:]
# Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123) # Feature Scaling """from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train)"""
X_train
array([[ 0.00000000e+00, 1.00000000e+00, 7.83894700e+04, 1.53773430e+05, 2.99737290e+05], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.35426920e+05, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 1.00671960e+05, 9.17906100e+04, 2.49744550e+05], [ 1.00000000e+00, 0.00000000e+00, 2.78929200e+04, 8.47107700e+04, 1.64470710e+05], [ 0.00000000e+00, 1.00000000e+00, 1.31876900e+05, 9.98147100e+04, 3.62861360e+05], [ 0.00000000e+00, 0.00000000e+00, 1.62597700e+05, 1.51377590e+05, 4.43898530e+05], [ 0.00000000e+00, 0.00000000e+00, 1.34615460e+05, 1.47198870e+05, 1.27716820e+05], [ 0.00000000e+00, 1.00000000e+00, 7.21076000e+04, 1.27864550e+05, 3.53183810e+05], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.16983800e+05, 4.51730600e+04], [ 0.00000000e+00, 1.00000000e+00, 7.70440100e+04, 9.92813400e+04, 1.40574810e+05], [ 0.00000000e+00, 1.00000000e+00, 6.11363800e+04, 1.52701920e+05, 8.82182300e+04], [ 0.00000000e+00, 1.00000000e+00, 1.14523610e+05, 1.22616840e+05, 2.61776230e+05], [ 0.00000000e+00, 1.00000000e+00, 4.60140200e+04, 8.50474400e+04, 2.05517640e+05], [ 1.00000000e+00, 0.00000000e+00, 7.53288700e+04, 1.44135980e+05, 1.34050070e+05], [ 1.00000000e+00, 0.00000000e+00, 1.30298130e+05, 1.45530060e+05, 3.23876680e+05], [ 0.00000000e+00, 0.00000000e+00, 7.62538600e+04, 1.13867300e+05, 2.98664470e+05], [ 0.00000000e+00, 1.00000000e+00, 5.42050000e+02, 5.17431500e+04, 0.00000000e+00], [ 0.00000000e+00, 1.00000000e+00, 1.44372410e+05, 1.18671850e+05, 3.83199620e+05], [ 1.00000000e+00, 0.00000000e+00, 6.75325300e+04, 1.05751030e+05, 3.04768730e+05], [ 0.00000000e+00, 0.00000000e+00, 2.21777400e+04, 1.54806140e+05, 2.83347200e+04], [ 1.00000000e+00, 0.00000000e+00, 1.42107340e+05, 9.13917700e+04, 3.66168420e+05], [ 0.00000000e+00, 0.00000000e+00, 7.80131100e+04, 1.21597550e+05, 2.64346060e+05], [ 1.00000000e+00, 0.00000000e+00, 2.86637600e+04, 1.27056210e+05, 2.01126820e+05], [ 1.00000000e+00, 0.00000000e+00, 1.19943240e+05, 1.56547420e+05, 2.56512920e+05], [ 0.00000000e+00, 1.00000000e+00, 1.55057300e+04, 1.27382300e+05, 3.55341700e+04], [ 0.00000000e+00, 0.00000000e+00, 6.46647100e+04, 1.39553160e+05, 1.37962620e+05], [ 0.00000000e+00, 0.00000000e+00, 4.40699500e+04, 5.12831400e+04, 1.97029420e+05], [ 0.00000000e+00, 0.00000000e+00, 3.85585100e+04, 8.29820900e+04, 1.74999300e+05], [ 0.00000000e+00, 0.00000000e+00, 1.23334880e+05, 1.08679170e+05, 3.04981620e+05], [ 0.00000000e+00, 0.00000000e+00, 6.34088600e+04, 1.29219610e+05, 4.60852500e+04], [ 1.00000000e+00, 0.00000000e+00, 5.54939500e+04, 1.03057490e+05, 2.14634810e+05], [ 1.00000000e+00, 0.00000000e+00, 7.39945600e+04, 1.22782750e+05, 3.03319260e+05], [ 0.00000000e+00, 0.00000000e+00, 2.36409300e+04, 9.61896300e+04, 1.48001110e+05], [ 0.00000000e+00, 1.00000000e+00, 8.64197000e+04, 1.53514110e+05, 0.00000000e+00], [ 0.00000000e+00, 1.00000000e+00, 9.46571600e+04, 1.45077580e+05, 2.82574310e+05], [ 0.00000000e+00, 1.00000000e+00, 2.02295900e+04, 6.59479300e+04, 1.85265100e+05], [ 0.00000000e+00, 0.00000000e+00, 4.64260700e+04, 1.57693920e+05, 2.10797670e+05], [ 1.00000000e+00, 0.00000000e+00, 6.60515200e+04, 1.82645560e+05, 1.18148200e+05], [ 1.00000000e+00, 0.00000000e+00, 1.53441510e+05, 1.01145550e+05, 4.07934540e+05], [ 0.00000000e+00, 1.00000000e+00, 1.00023000e+03, 1.24153040e+05, 1.90393000e+03]])
X_test
array([[ 1.00000000e+00, 0.00000000e+00, 1.01913080e+05, 1.10594110e+05, 2.29160950e+05], [ 0.00000000e+00, 0.00000000e+00, 9.19923900e+04, 1.35495070e+05, 2.52664930e+05], [ 1.00000000e+00, 0.00000000e+00, 6.19944800e+04, 1.15641280e+05, 9.11312400e+04], [ 1.00000000e+00, 0.00000000e+00, 1.31546000e+03, 1.15816210e+05, 2.97114460e+05], [ 1.00000000e+00, 0.00000000e+00, 9.17491600e+04, 1.14175790e+05, 2.94919570e+05], [ 0.00000000e+00, 1.00000000e+00, 1.65349200e+05, 1.36897800e+05, 4.71784100e+05], [ 0.00000000e+00, 0.00000000e+00, 2.87543300e+04, 1.18546050e+05, 1.72795670e+05], [ 1.00000000e+00, 0.00000000e+00, 9.38637500e+04, 1.27320380e+05, 2.49839440e+05], [ 0.00000000e+00, 1.00000000e+00, 6.56054800e+04, 1.53032060e+05, 1.07138380e+05], [ 0.00000000e+00, 1.00000000e+00, 1.20542520e+05, 1.48718950e+05, 3.11613290e+05]])
y_train
array([ 111313.02, 42559.73, 144259.4 , 77798.83, 156991.12, 191792.06, 156122.51, 105008.31, 14681.4 , 108552.04, 97483.56, 129917.04, 96479.51, 105733.54, 155752.6 , 118474.03, 35673.41, 182901.99, 108733.99, 65200.33, 166187.94, 126992.93, 90708.19, 132602.65, 69758.98, 107404.34, 89949.14, 81005.76, 149759.96, 97427.84, 96778.92, 110352.25, 71498.49, 122776.86, 125370.37, 81229.06, 96712.8 , 103282.38, 191050.39, 64926.08])
y_test
array([ 146121.95, 134307.35, 99937.59, 49490.75, 124266.9 , 192261.83, 78239.91, 141585.52, 101004.64, 152211.77])
print("Length of X_train: ", len(X_train)) print("Length of X_test: ", len(X_test)) print("Length of y_train: ", len(y_train)) print("Length of y_test: ", len(y_test))
Length of X_train: 40 Length of X_test: 10 Length of y_train: 40 Length of y_test: 10

Fitting Multiple Linear Regression to the training set

regressor = LinearRegression() regressor.fit(X_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

Predicting the test set reults

y_pred = regressor.predict(X_test)
y_pred # Predicted result
array([ 133749.91948849, 126771.56418164, 97712.50104996, 58138.82512333, 128196.536732 , 192274.03929242, 75126.75206536, 127984.52000745, 101453.65842153, 151532.50862836])
y_test # Real result
array([ 146121.95, 134307.35, 99937.59, 49490.75, 124266.9 , 192261.83, 78239.91, 141585.52, 101004.64, 152211.77])

Building the optimal model using Backward Elimination

# Adding 1 to each row for b0 in equation of Multiple Linear Regression # y = b0 + b1*x1 + b2*x2 + ... + bn*xn # Here as we can say x0 = 1, we need a 1 append in each row X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1)
X
array([[ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.65349200e+05, 1.36897800e+05, 4.71784100e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.62597700e+05, 1.51377590e+05, 4.43898530e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.53441510e+05, 1.01145550e+05, 4.07934540e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.44372410e+05, 1.18671850e+05, 3.83199620e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.42107340e+05, 9.13917700e+04, 3.66168420e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.31876900e+05, 9.98147100e+04, 3.62861360e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.34615460e+05, 1.47198870e+05, 1.27716820e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.30298130e+05, 1.45530060e+05, 3.23876680e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.20542520e+05, 1.48718950e+05, 3.11613290e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.23334880e+05, 1.08679170e+05, 3.04981620e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.01913080e+05, 1.10594110e+05, 2.29160950e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00671960e+05, 9.17906100e+04, 2.49744550e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.38637500e+04, 1.27320380e+05, 2.49839440e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 9.19923900e+04, 1.35495070e+05, 2.52664930e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.19943240e+05, 1.56547420e+05, 2.56512920e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.14523610e+05, 1.22616840e+05, 2.61776230e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.80131100e+04, 1.21597550e+05, 2.64346060e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 9.46571600e+04, 1.45077580e+05, 2.82574310e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.17491600e+04, 1.14175790e+05, 2.94919570e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 8.64197000e+04, 1.53514110e+05, 0.00000000e+00, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.62538600e+04, 1.13867300e+05, 2.98664470e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.83894700e+04, 1.53773430e+05, 2.99737290e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.39945600e+04, 1.22782750e+05, 3.03319260e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.75325300e+04, 1.05751030e+05, 3.04768730e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.70440100e+04, 9.92813400e+04, 1.40574810e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.46647100e+04, 1.39553160e+05, 1.37962620e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.53288700e+04, 1.44135980e+05, 1.34050070e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.21076000e+04, 1.27864550e+05, 3.53183810e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.60515200e+04, 1.82645560e+05, 1.18148200e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.56054800e+04, 1.53032060e+05, 1.07138380e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.19944800e+04, 1.15641280e+05, 9.11312400e+04, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.11363800e+04, 1.52701920e+05, 8.82182300e+04, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.34088600e+04, 1.29219610e+05, 4.60852500e+04, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 5.54939500e+04, 1.03057490e+05, 2.14634810e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.64260700e+04, 1.57693920e+05, 2.10797670e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 4.60140200e+04, 8.50474400e+04, 2.05517640e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.86637600e+04, 1.27056210e+05, 2.01126820e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.40699500e+04, 5.12831400e+04, 1.97029420e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 2.02295900e+04, 6.59479300e+04, 1.85265100e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.85585100e+04, 8.29820900e+04, 1.74999300e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.87543300e+04, 1.18546050e+05, 1.72795670e+05, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.78929200e+04, 8.47107700e+04, 1.64470710e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.36409300e+04, 9.61896300e+04, 1.48001110e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.55057300e+04, 1.27382300e+05, 3.55341700e+04, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.21777400e+04, 1.54806140e+05, 2.83347200e+04, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00023000e+03, 1.24153040e+05, 1.90393000e+03, 1.00000000e+00], [ 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.31546000e+03, 1.15816210e+05, 2.97114460e+05, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.35426920e+05, 0.00000000e+00, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 5.42050000e+02, 5.17431500e+04, 0.00000000e+00, 1.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.16983800e+05, 4.51730600e+04, 1.00000000e+00]])
X_opt = X[:, [0, 1, 2, 3, 4, 5]] # Matrix containing all the independent variables regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
# Remove index 2 as p_value is highest i.e. p_value>0.05 X_opt = X[:, [0, 1, 3, 4, 5]] # Matrix containing all the independent variables regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
# Remove index 1 as p_value is highest i.e. p_value>0.05 X_opt = X[:, [0, 3, 4, 5]] # Matrix containing all the independent variables regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
# Remove index 4 as p_value is highest i.e. p_value>0.05 X_opt = X[:, [0, 3, 5]] # Matrix containing all the independent variables regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
# Remove index 5 as p_value is highest i.e. p_value>0.05 X_opt = X[:, [0, 3]] # Matrix containing all the independent variables regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
regressor_OLS.predict(X_test[:, [0, 3]])
array([ 143512.49299091, 115752.26910575, 147824.24676957, 147973.68795909, 146572.29130837, 116950.60924051, 101272.86757388, 157801.6011199 , 130733.96833499, 127049.31568008])
y_test
array([ 146121.95, 134307.35, 99937.59, 49490.75, 124266.9 , 192261.83, 78239.91, 141585.52, 101004.64, 152211.77])